library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(here)
## here() starts at /Users/feliciacruz/Documents/MEDS/Winter_22/EDS_240/data-viz-final
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
cots <- read_csv(here("cots.csv")) %>%
clean_names()
## Rows: 1008 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Site, Habitat
## dbl (3): Year, Transect, COTS
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cots$year <- as.factor(cots$year)
cots_summary <- cots %>%
group_by(site, year) %>%
summarize(total = sum(cots))
## `summarise()` has grouped output by 'site'. You can override using the `.groups` argument.
# make totals df
totals <- cots_summary %>%
group_by(year) %>%
summarize(total_count = sum(total))
g <- ggplot(data = cots_summary, aes(x = as.factor(year), y = total, fill = site)) +
geom_col(position = "stack") +
labs(title = "Crown of Thorns Sea Stars",
subtitle = "Moorea, French Polynesia (2005-2018)",
fill = "Site",
x = "year") +
scale_y_continuous(breaks = seq(0, 150, by = 25)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
scale_fill_brewer(palette = "Set3")
g
ggplotly(g,
tooltip = c("total", "site")) %>%
layout(title = list(text = paste0('Crown of Thorns Sea Stars',
'<br>',
'<sup>',
'Moorea, French Polynesia (2005-2018)',
'</sup>')))
g_2 <- ggplot(data = cots_summary, aes(x = as.factor(year), y = total, fill = site)) +
geom_col(position = "stack") +
labs(title = "Crown of Thorns Sea Stars",
subtitle = "Moorea, French Polynesia (2005-2018)",
fill = "Site",
x = "year") +
scale_y_continuous(breaks = seq(0, 150, by = 25)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
scale_fill_brewer(palette = "Set3") +
geom_text(
aes(label = stat(y), group = year),
stat = 'summary', fun = sum, vjust = -1, nudge_y = 2
)
ggplotly(g_2,
tooltip = c("total", "site")) %>%
layout(title = list(text = paste0('Crown of Thorns Sea Stars',
'<br>',
'<sup>',
'Moorea, French Polynesia (2005-2018)',
'</sup>')))
testing
# total coral line graph?
coral <- read_csv(here("coral_pop_since_05.csv")) %>%
clean_names() %>%
as.data.frame() %>%
separate(date, c("year", "month"), "-")
## Rows: 285404 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Date, Location, Site, Habitat, Taxonomy / Substrate / Functional Group
## dbl (3): Transect, Quadrat, Percent Cover
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [285404].
coral %>%
group_by(year) %>%
summarize(mean(percent_cover))
## # A tibble: 16 × 2
## year `mean(percent_cover)`
## <chr> <dbl>
## 1 (285403 rows) NA
## 2 2005 18.0
## 3 2006 16.3
## 4 2007 3.49
## 5 2008 3.70
## 6 2009 3.70
## 7 2010 3.63
## 8 2011 3.45
## 9 2012 3.45
## 10 2013 3.23
## 11 2014 2.78
## 12 2015 2.78
## 13 2016 3.04
## 14 2017 3.13
## 15 2018 3.12
## 16 2019 3.12
# explore macroalgae
algae <- read_csv(here("benthic_algae.csv"))
## Rows: 187522 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Year, Location, Site, Habitat, Taxonomy_Substrate_Functional_Group
## dbl (3): Transect, Quadrat, Percent_Cover
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
group by site & year , sum total observations / 1000 -> annual site mean density
cots_density <- cots %>%
group_by(site, year) %>%
summarize(density = (sum(cots)/1000)) # this should give annual site mean density
## `summarise()` has grouped output by 'site'. You can override using the `.groups` argument.
plot density
ggplot(data = cots_density, aes(x = year, y = density)) +
geom_point(aes(color = site)) +
geom_line(aes(group = site, color = site)) +
facet_wrap(~site) +
labs(title = "Crown of Thorns Sea Stars - Annual Site Means",
subtitle = "Moorea, French Polynesia (2005 - 2018)") +
theme_bw() +
theme(axis.text.x = element_text(angle = 80, hjust = 1))
Testing
From Figure 1, we can see